{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Chapter 5: Handling Missing Values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Identifying missing values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import polars as pl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import date\n",
    "import numpy as np\n",
    "\n",
    "date_col = pl.date_range(date(2023, 1, 1), date(2023, 1, 15), '1d', eager=True)\n",
    "avg_temp_c_list = [-3,None,6,-1,np.nan,6,4,None,1,2,np.nan,7,9,-2,None]\n",
    "df = pl.DataFrame({\n",
    "    'date': date_col,\n",
    "    'avg_temp_celsius': avg_temp_c_list\n",
    "}, strict=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to do it..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.null_count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.select('avg_temp_celsius').null_count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.select('date', 'avg_temp_celsius').null_count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.select(pl.col('avg_temp_celsius').null_count())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.select(\n",
    "    pl.col('avg_temp_celsius')\n",
    "    .is_null()\n",
    "    .sum()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "(\n",
    "    df\n",
    "    .filter(pl.col('avg_temp_celsius').is_null())\n",
    "    .select(pl.len())\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.filter(pl.col('avg_temp_celsius').is_null()).shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.select(\n",
    "    pl.col('avg_temp_celsius')\n",
    "    .is_nan()\n",
    "    .sum()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(\n",
    "    df\n",
    "    .filter(pl.col('avg_temp_celsius').is_nan())\n",
    "    .select(pl.len())\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Deleting rows and columns containing missing values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to do it..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import polars as pl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pl.read_csv('../data/temperatures.csv') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr,\n",
       ".dataframe > tbody > tr {\n",
       "  text-align: right;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>date</th><th>avg_temp_celsius</th></tr><tr><td>str</td><td>f64</td></tr></thead><tbody><tr><td>&quot;2023-01-01&quot;</td><td>-3.0</td></tr><tr><td>&quot;2023-01-02&quot;</td><td>null</td></tr><tr><td>&quot;2023-01-03&quot;</td><td>6.0</td></tr><tr><td>&quot;2023-01-04&quot;</td><td>-1.0</td></tr><tr><td>&quot;2023-01-05&quot;</td><td>NaN</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 2)\n",
       "┌────────────┬──────────────────┐\n",
       "│ date       ┆ avg_temp_celsius │\n",
       "│ ---        ┆ ---              │\n",
       "│ str        ┆ f64              │\n",
       "╞════════════╪══════════════════╡\n",
       "│ 2023-01-01 ┆ -3.0             │\n",
       "│ 2023-01-02 ┆ null             │\n",
       "│ 2023-01-03 ┆ 6.0              │\n",
       "│ 2023-01-04 ┆ -1.0             │\n",
       "│ 2023-01-05 ┆ NaN              │\n",
       "└────────────┴──────────────────┘"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop_nulls().null_count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.select(\n",
    "    pl.col('avg_temp_celsius')\n",
    "    .drop_nulls()\n",
    "    .null_count()\n",
    ")    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.filter(pl.col('avg_temp_celsius').is_not_null())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols_to_drop = [column for column in df.columns if df.select(pl.col(column).is_null().any())[0,0]]\n",
    "df.drop(cols_to_drop).columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols_to_drop = [column for column in df.columns if df.select(pl.col(column).is_null().any()).item()]\n",
    "df.drop(cols_to_drop).columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.select(\n",
    "    pl.col('avg_temp_celsius')\n",
    "    .drop_nans()\n",
    "    .is_nan()\n",
    "    .sum()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.filter(pl.col('avg_temp_celsius').is_not_nan())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import polars.selectors as cs\n",
    "cols_to_drop = df.select(cs.float().is_nan().any()).columns\n",
    "df.drop(cols_to_drop).columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.select(pl.exclude(cols_to_drop)).columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There is more..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.fill_nan(None).drop_nulls()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Filling missing values"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to do it..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr,\n",
       ".dataframe > tbody > tr {\n",
       "  text-align: right;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (5, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>date</th><th>avg_temp_celsius</th></tr><tr><td>str</td><td>f64</td></tr></thead><tbody><tr><td>&quot;2023-01-01&quot;</td><td>-3.0</td></tr><tr><td>&quot;2023-01-02&quot;</td><td>null</td></tr><tr><td>&quot;2023-01-03&quot;</td><td>6.0</td></tr><tr><td>&quot;2023-01-04&quot;</td><td>-1.0</td></tr><tr><td>&quot;2023-01-05&quot;</td><td>NaN</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (5, 2)\n",
       "┌────────────┬──────────────────┐\n",
       "│ date       ┆ avg_temp_celsius │\n",
       "│ ---        ┆ ---              │\n",
       "│ str        ┆ f64              │\n",
       "╞════════════╪══════════════════╡\n",
       "│ 2023-01-01 ┆ -3.0             │\n",
       "│ 2023-01-02 ┆ null             │\n",
       "│ 2023-01-03 ┆ 6.0              │\n",
       "│ 2023-01-04 ┆ -1.0             │\n",
       "│ 2023-01-05 ┆ NaN              │\n",
       "└────────────┴──────────────────┘"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pl.read_csv('../data/temperatures.csv') \n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr,\n",
       ".dataframe > tbody > tr {\n",
       "  text-align: right;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (15, 2)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>avg_temp_celsius</th><th>avg_temp_nulls_filled</th></tr><tr><td>f64</td><td>str</td></tr></thead><tbody><tr><td>-3.0</td><td>&quot;-3.0&quot;</td></tr><tr><td>null</td><td>&quot;1&quot;</td></tr><tr><td>6.0</td><td>&quot;6.0&quot;</td></tr><tr><td>-1.0</td><td>&quot;-1.0&quot;</td></tr><tr><td>NaN</td><td>&quot;NaN&quot;</td></tr><tr><td>6.0</td><td>&quot;6.0&quot;</td></tr><tr><td>4.0</td><td>&quot;4.0&quot;</td></tr><tr><td>null</td><td>&quot;1&quot;</td></tr><tr><td>1.0</td><td>&quot;1.0&quot;</td></tr><tr><td>2.0</td><td>&quot;2.0&quot;</td></tr><tr><td>NaN</td><td>&quot;NaN&quot;</td></tr><tr><td>7.0</td><td>&quot;7.0&quot;</td></tr><tr><td>9.0</td><td>&quot;9.0&quot;</td></tr><tr><td>-2.0</td><td>&quot;-2.0&quot;</td></tr><tr><td>null</td><td>&quot;1&quot;</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (15, 2)\n",
       "┌──────────────────┬───────────────────────┐\n",
       "│ avg_temp_celsius ┆ avg_temp_nulls_filled │\n",
       "│ ---              ┆ ---                   │\n",
       "│ f64              ┆ str                   │\n",
       "╞══════════════════╪═══════════════════════╡\n",
       "│ -3.0             ┆ -3.0                  │\n",
       "│ null             ┆ 1                     │\n",
       "│ 6.0              ┆ 6.0                   │\n",
       "│ -1.0             ┆ -1.0                  │\n",
       "│ …                ┆ …                     │\n",
       "│ 7.0              ┆ 7.0                   │\n",
       "│ 9.0              ┆ 9.0                   │\n",
       "│ -2.0             ┆ -2.0                  │\n",
       "│ null             ┆ 1                     │\n",
       "└──────────────────┴───────────────────────┘"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.select(\n",
    "    'avg_temp_celsius',\n",
    "    avg_temp_nulls_filled=pl.col('avg_temp_celsius').fill_null(pl.lit('1'))\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr,\n",
       ".dataframe > tbody > tr {\n",
       "  text-align: right;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (15, 6)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>avg_temp_celsius</th><th>forward_filled</th><th>backward_filled</th><th>mean_filled</th><th>min_filled</th><th>max_filled</th></tr><tr><td>f64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>-3.0</td><td>-3.0</td><td>-3.0</td><td>-3.0</td><td>-3.0</td><td>-3.0</td></tr><tr><td>null</td><td>-3.0</td><td>6.0</td><td>NaN</td><td>-3.0</td><td>9.0</td></tr><tr><td>6.0</td><td>6.0</td><td>6.0</td><td>6.0</td><td>6.0</td><td>6.0</td></tr><tr><td>-1.0</td><td>-1.0</td><td>-1.0</td><td>-1.0</td><td>-1.0</td><td>-1.0</td></tr><tr><td>NaN</td><td>NaN</td><td>NaN</td><td>NaN</td><td>NaN</td><td>NaN</td></tr><tr><td>6.0</td><td>6.0</td><td>6.0</td><td>6.0</td><td>6.0</td><td>6.0</td></tr><tr><td>4.0</td><td>4.0</td><td>4.0</td><td>4.0</td><td>4.0</td><td>4.0</td></tr><tr><td>null</td><td>4.0</td><td>1.0</td><td>NaN</td><td>-3.0</td><td>9.0</td></tr><tr><td>1.0</td><td>1.0</td><td>1.0</td><td>1.0</td><td>1.0</td><td>1.0</td></tr><tr><td>2.0</td><td>2.0</td><td>2.0</td><td>2.0</td><td>2.0</td><td>2.0</td></tr><tr><td>NaN</td><td>NaN</td><td>NaN</td><td>NaN</td><td>NaN</td><td>NaN</td></tr><tr><td>7.0</td><td>7.0</td><td>7.0</td><td>7.0</td><td>7.0</td><td>7.0</td></tr><tr><td>9.0</td><td>9.0</td><td>9.0</td><td>9.0</td><td>9.0</td><td>9.0</td></tr><tr><td>-2.0</td><td>-2.0</td><td>-2.0</td><td>-2.0</td><td>-2.0</td><td>-2.0</td></tr><tr><td>null</td><td>-2.0</td><td>null</td><td>NaN</td><td>-3.0</td><td>9.0</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (15, 6)\n",
       "┌──────────────────┬────────────────┬─────────────────┬─────────────┬────────────┬────────────┐\n",
       "│ avg_temp_celsius ┆ forward_filled ┆ backward_filled ┆ mean_filled ┆ min_filled ┆ max_filled │\n",
       "│ ---              ┆ ---            ┆ ---             ┆ ---         ┆ ---        ┆ ---        │\n",
       "│ f64              ┆ f64            ┆ f64             ┆ f64         ┆ f64        ┆ f64        │\n",
       "╞══════════════════╪════════════════╪═════════════════╪═════════════╪════════════╪════════════╡\n",
       "│ -3.0             ┆ -3.0           ┆ -3.0            ┆ -3.0        ┆ -3.0       ┆ -3.0       │\n",
       "│ null             ┆ -3.0           ┆ 6.0             ┆ NaN         ┆ -3.0       ┆ 9.0        │\n",
       "│ 6.0              ┆ 6.0            ┆ 6.0             ┆ 6.0         ┆ 6.0        ┆ 6.0        │\n",
       "│ -1.0             ┆ -1.0           ┆ -1.0            ┆ -1.0        ┆ -1.0       ┆ -1.0       │\n",
       "│ …                ┆ …              ┆ …               ┆ …           ┆ …          ┆ …          │\n",
       "│ 7.0              ┆ 7.0            ┆ 7.0             ┆ 7.0         ┆ 7.0        ┆ 7.0        │\n",
       "│ 9.0              ┆ 9.0            ┆ 9.0             ┆ 9.0         ┆ 9.0        ┆ 9.0        │\n",
       "│ -2.0             ┆ -2.0           ┆ -2.0            ┆ -2.0        ┆ -2.0       ┆ -2.0       │\n",
       "│ null             ┆ -2.0           ┆ null            ┆ NaN         ┆ -3.0       ┆ 9.0        │\n",
       "└──────────────────┴────────────────┴─────────────────┴─────────────┴────────────┴────────────┘"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.select(\n",
    "    'avg_temp_celsius',\n",
    "    forward_filled=pl.col('avg_temp_celsius').fill_null(strategy='forward'),\n",
    "    backward_filled=pl.col('avg_temp_celsius').fill_null(strategy='backward'),\n",
    "    mean_filled=pl.col('avg_temp_celsius').fill_null(strategy='mean'),\n",
    "    min_filled=pl.col('avg_temp_celsius').fill_null(strategy='min'),\n",
    "    max_filled=pl.col('avg_temp_celsius').fill_null(strategy='max'),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr,\n",
       ".dataframe > tbody > tr {\n",
       "  text-align: right;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (15, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>avg_temp_celsius</th><th>interpolated_linear</th><th>interpolated_nearest</th></tr><tr><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>-3.0</td><td>-3.0</td><td>-3.0</td></tr><tr><td>null</td><td>1.5</td><td>6.0</td></tr><tr><td>6.0</td><td>6.0</td><td>6.0</td></tr><tr><td>-1.0</td><td>-1.0</td><td>-1.0</td></tr><tr><td>NaN</td><td>NaN</td><td>NaN</td></tr><tr><td>6.0</td><td>6.0</td><td>6.0</td></tr><tr><td>4.0</td><td>4.0</td><td>4.0</td></tr><tr><td>null</td><td>2.5</td><td>1.0</td></tr><tr><td>1.0</td><td>1.0</td><td>1.0</td></tr><tr><td>2.0</td><td>2.0</td><td>2.0</td></tr><tr><td>NaN</td><td>NaN</td><td>NaN</td></tr><tr><td>7.0</td><td>7.0</td><td>7.0</td></tr><tr><td>9.0</td><td>9.0</td><td>9.0</td></tr><tr><td>-2.0</td><td>-2.0</td><td>-2.0</td></tr><tr><td>null</td><td>null</td><td>null</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (15, 3)\n",
       "┌──────────────────┬─────────────────────┬──────────────────────┐\n",
       "│ avg_temp_celsius ┆ interpolated_linear ┆ interpolated_nearest │\n",
       "│ ---              ┆ ---                 ┆ ---                  │\n",
       "│ f64              ┆ f64                 ┆ f64                  │\n",
       "╞══════════════════╪═════════════════════╪══════════════════════╡\n",
       "│ -3.0             ┆ -3.0                ┆ -3.0                 │\n",
       "│ null             ┆ 1.5                 ┆ 6.0                  │\n",
       "│ 6.0              ┆ 6.0                 ┆ 6.0                  │\n",
       "│ -1.0             ┆ -1.0                ┆ -1.0                 │\n",
       "│ …                ┆ …                   ┆ …                    │\n",
       "│ 7.0              ┆ 7.0                 ┆ 7.0                  │\n",
       "│ 9.0              ┆ 9.0                 ┆ 9.0                  │\n",
       "│ -2.0             ┆ -2.0                ┆ -2.0                 │\n",
       "│ null             ┆ null                ┆ null                 │\n",
       "└──────────────────┴─────────────────────┴──────────────────────┘"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.select(\n",
    "    'avg_temp_celsius',\n",
    "    interpolated_linear=pl.col('avg_temp_celsius').interpolate(),\n",
    "    interpolated_nearest=pl.col('avg_temp_celsius').interpolate(method='nearest')\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr,\n",
       ".dataframe > tbody > tr {\n",
       "  text-align: right;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (15, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>avg_temp_celsius</th><th>avg_temp_median</th><th>avg_temp_max_minus_min</th></tr><tr><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>-3.0</td><td>-3.0</td><td>-3.0</td></tr><tr><td>null</td><td>5.0</td><td>12.0</td></tr><tr><td>6.0</td><td>6.0</td><td>6.0</td></tr><tr><td>-1.0</td><td>-1.0</td><td>-1.0</td></tr><tr><td>NaN</td><td>NaN</td><td>NaN</td></tr><tr><td>6.0</td><td>6.0</td><td>6.0</td></tr><tr><td>4.0</td><td>4.0</td><td>4.0</td></tr><tr><td>null</td><td>5.0</td><td>12.0</td></tr><tr><td>1.0</td><td>1.0</td><td>1.0</td></tr><tr><td>2.0</td><td>2.0</td><td>2.0</td></tr><tr><td>NaN</td><td>NaN</td><td>NaN</td></tr><tr><td>7.0</td><td>7.0</td><td>7.0</td></tr><tr><td>9.0</td><td>9.0</td><td>9.0</td></tr><tr><td>-2.0</td><td>-2.0</td><td>-2.0</td></tr><tr><td>null</td><td>5.0</td><td>12.0</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (15, 3)\n",
       "┌──────────────────┬─────────────────┬────────────────────────┐\n",
       "│ avg_temp_celsius ┆ avg_temp_median ┆ avg_temp_max_minus_min │\n",
       "│ ---              ┆ ---             ┆ ---                    │\n",
       "│ f64              ┆ f64             ┆ f64                    │\n",
       "╞══════════════════╪═════════════════╪════════════════════════╡\n",
       "│ -3.0             ┆ -3.0            ┆ -3.0                   │\n",
       "│ null             ┆ 5.0             ┆ 12.0                   │\n",
       "│ 6.0              ┆ 6.0             ┆ 6.0                    │\n",
       "│ -1.0             ┆ -1.0            ┆ -1.0                   │\n",
       "│ …                ┆ …               ┆ …                      │\n",
       "│ 7.0              ┆ 7.0             ┆ 7.0                    │\n",
       "│ 9.0              ┆ 9.0             ┆ 9.0                    │\n",
       "│ -2.0             ┆ -2.0            ┆ -2.0                   │\n",
       "│ null             ┆ 5.0             ┆ 12.0                   │\n",
       "└──────────────────┴─────────────────┴────────────────────────┘"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.select(\n",
    "    'avg_temp_celsius',\n",
    "    avg_temp_median=pl.col('avg_temp_celsius')\n",
    "        .fill_null(\n",
    "            pl.col('avg_temp_celsius').median()\n",
    "        ),\n",
    "    avg_temp_max_minus_min=pl.col('avg_temp_celsius')\n",
    "        .fill_null(\n",
    "            pl.col('avg_temp_celsius').max() - pl.col('avg_temp_celsius').min()\n",
    "        )\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There is more..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr,\n",
       ".dataframe > tbody > tr {\n",
       "  text-align: right;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (9, 1)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>values</th></tr><tr><td>i64</td></tr></thead><tbody><tr><td>1</td></tr><tr><td>2</td></tr><tr><td>null</td></tr><tr><td>null</td></tr><tr><td>null</td></tr><tr><td>3</td></tr><tr><td>4</td></tr><tr><td>null</td></tr><tr><td>5</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (9, 1)\n",
       "┌────────┐\n",
       "│ values │\n",
       "│ ---    │\n",
       "│ i64    │\n",
       "╞════════╡\n",
       "│ 1      │\n",
       "│ 2      │\n",
       "│ null   │\n",
       "│ null   │\n",
       "│ null   │\n",
       "│ 3      │\n",
       "│ 4      │\n",
       "│ null   │\n",
       "│ 5      │\n",
       "└────────┘"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pl.DataFrame(\n",
    "    {'values': [1,2,None,None,None,3,4,None,5]}\n",
    ")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr,\n",
       ".dataframe > tbody > tr {\n",
       "  text-align: right;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (9, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>values</th><th>forward_fill</th><th>forward_fill_1</th><th>backward_fill</th><th>backward_fill_2</th></tr><tr><td>i64</td><td>i64</td><td>i64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>1</td><td>1</td><td>1</td><td>1</td><td>1</td></tr><tr><td>2</td><td>2</td><td>2</td><td>2</td><td>2</td></tr><tr><td>null</td><td>2</td><td>2</td><td>3</td><td>null</td></tr><tr><td>null</td><td>2</td><td>null</td><td>3</td><td>3</td></tr><tr><td>null</td><td>2</td><td>null</td><td>3</td><td>3</td></tr><tr><td>3</td><td>3</td><td>3</td><td>3</td><td>3</td></tr><tr><td>4</td><td>4</td><td>4</td><td>4</td><td>4</td></tr><tr><td>null</td><td>4</td><td>4</td><td>5</td><td>5</td></tr><tr><td>5</td><td>5</td><td>5</td><td>5</td><td>5</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (9, 5)\n",
       "┌────────┬──────────────┬────────────────┬───────────────┬─────────────────┐\n",
       "│ values ┆ forward_fill ┆ forward_fill_1 ┆ backward_fill ┆ backward_fill_2 │\n",
       "│ ---    ┆ ---          ┆ ---            ┆ ---           ┆ ---             │\n",
       "│ i64    ┆ i64          ┆ i64            ┆ i64           ┆ i64             │\n",
       "╞════════╪══════════════╪════════════════╪═══════════════╪═════════════════╡\n",
       "│ 1      ┆ 1            ┆ 1              ┆ 1             ┆ 1               │\n",
       "│ 2      ┆ 2            ┆ 2              ┆ 2             ┆ 2               │\n",
       "│ null   ┆ 2            ┆ 2              ┆ 3             ┆ null            │\n",
       "│ null   ┆ 2            ┆ null           ┆ 3             ┆ 3               │\n",
       "│ null   ┆ 2            ┆ null           ┆ 3             ┆ 3               │\n",
       "│ 3      ┆ 3            ┆ 3              ┆ 3             ┆ 3               │\n",
       "│ 4      ┆ 4            ┆ 4              ┆ 4             ┆ 4               │\n",
       "│ null   ┆ 4            ┆ 4              ┆ 5             ┆ 5               │\n",
       "│ 5      ┆ 5            ┆ 5              ┆ 5             ┆ 5               │\n",
       "└────────┴──────────────┴────────────────┴───────────────┴─────────────────┘"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.select(\n",
    "    'values',\n",
    "    forward_fill=pl.col('values').forward_fill(),\n",
    "    forward_fill_1=pl.col('values').forward_fill(limit=1),\n",
    "    backward_fill=pl.col('values').backward_fill(),\n",
    "    backward_fill_2=pl.col('values').backward_fill(limit=2),\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
